*
* Purpose: prepare matching data

** Read SP-DA ------------------------------------------------------------------
import delimited "${data_matching_raw}", clear
rename mu_spda mu4
rename mu_srda mu_sr

** Relabel match data -----------------------------------------------------------
	forvalues i = 1/4 {
		replace mu`i' = "" if mu`i' == "None"
		destring mu`i', replace
	}
	rename sid id
	
	replace mu_sr = "" if mu_sr == "None"
	destring mu_sr, replace

	label variable id "Student id"
	label variable mu_sr "Contract id (SR-DA)"
	label variable mu1 "Contract id - heuristic 1"
	label variable mu2 "Contract id - heuristic 2"
	label variable mu3 "Contract id - heuristic 3"
	label variable mu4 "Contract id (SP-DA)"
	
	* Merge information on all the students (incl. those whose match is fixed) -
	merge 1:n id using "${data_students}"
	gen fixed_real = _m == 2
	
	replace mu_sr = contract_id if _m == 2
	forvalues i = 1/4 {
		replace mu`i' = contract_id if _m == 2
	}
	
	gen contract_id_real = contract_id if rank_of_admission != .
	gen program_id_real = program_id if rank_of_admission != .
	gen program_county_real = program_county if rank_of_admission != .
	rename rank_of_admission rank_of_admission_real
	drop program_id contract_id _m
	
	keep id mu_sr mu1 mu2 mu3 mu4 *_real

	* Merge original funding-level ---------------------------------------------
	gen contract_id = contract_id_real
	
	merge 1:1 id contract_id using "${data_applications}", keep(1 3)
	gen funding_real = funding
	
	keep id mu_sr mu1 mu2 mu3 mu4 *_real
	
	* Merge contract-specific version ------------------------------------------
	gen contract_id = mu_sr
	
	merge 1:1 id contract_id using "${data_applications}", keep(1 3)
	
	gen funding_orig = funding
	gen contract_id_orig = contract_id
	gen program_id_orig = program_id
	gen rank_of_admission_orig = rank
	gen program_county_orig = program_county
	gen schedule_orig = munkarend
		
	keep id mu_sr mu1 mu2 mu3 mu4 *_real *_orig
	
	* Merge heuristics ---------------------------------------------------------
	forvalues i = 1/4 {
		gen contract_id = mu`i'
		
		merge 1:1 id contract_id using "${data_applications}", keep (1 3)
		
		gen funding_h`i' = funding
		gen contract_id_h`i' = contract_id
		gen program_id_h`i' = program_id
		gen rank_of_admission_h`i' = rank
		gen program_county_h`i' = program_county
		gen schedule_h`i' = munkarend
			
		keep id mu_sr mu1 mu2 mu3 mu4 *_real *_orig *_h*
	}
	
	* Define winners and losers (relative to *_orig) ---------------------------
	forvalues i = 1/3 {
		* Winners
		gen winner_h`i' = 0
		replace winner_h`i' = 1 if rank_of_admission_h`i' != . & rank_of_admission_orig == .
		replace winner_h`i' = 1 if rank_of_admission_h`i' < rank_of_admission_orig & ///
				rank_of_admission_h`i' != . & rank_of_admission_orig != .
				
		* Losers
		gen loser_h`i' = 0
		replace loser_h`i' = 1 if rank_of_admission_h`i' == . & rank_of_admission_orig != .
		replace loser_h`i' = 1 if rank_of_admission_h`i' > rank_of_admission_orig & ///
				rank_of_admission_h`i' != . & rank_of_admission_orig != .
			
	}
	
	gen fulltime_orig = schedule_orig == 1
	forvalues h = 1/4 {
		gen fulltime_h`h' = schedule_h`h' == 1
	}
	
compress

lab var id "Applicant id"
lab var contract_id_real "Contract id (realized)"
lab var contract_id_orig "Contract id (benchmark)"
lab var contract_id_h1 "Contract id (Greedy reject algo.)"
lab var contract_id_h2 "Contract id (Preference flip algo.)"
lab var contract_id_h3 "Contract id (Combined algo.)"
lab var contract_id_h4 "Contract id (SP-DA)"

lab var program_id_orig "Program id (benchmark)"
lab var program_id_h1 "Program id (Greedy reject algo.)"
lab var program_id_h2 "Program id (Preference flip algo.)"
lab var program_id_h3 "Program id (Combined algo.)"
lab var program_id_h4 "Program id (SP-DA)"

lab var program_county_orig "Program county (benchmark)"
lab var program_county_h1 "Program county (Greedy reject algo.)"
lab var program_county_h2 "Program county (Preference flip algo.)"
lab var program_county_h3 "Program county (Combined algo.)"
lab var program_county_h4 "Program county (SP-DA)"

lab var schedule_orig "Schedule (benchmark)"
lab var schedule_h1 "Schedule (Greedy reject algo.)"
lab var schedule_h2 "Schedule (Preference flip algo.)"
lab var schedule_h3 "Schedule (Combined algo.)"
lab var schedule_h4 "Schedule (SP-DA)"

lab var funding_real "State-funded (1) or self-funded (2) contract (realized, 2007)"
lab var funding_orig "State-funded (1) or self-funded (2) contract (benchmark)"
lab var funding_h1 "State-funded (1) or self-funded (2) contract (Greedy reject algo.)"
lab var funding_h2 "State-funded (1) or self-funded (2) contract (Preference flip algo.)"
lab var funding_h3 "State-funded (1) or self-funded (2) contract (Combined algo.)"
lab var funding_h4 "State-funded (1) or self-funded (2) contract (SP-DA)"

lab var fulltime_orig "Matched to full-time program (benchmark)"
lab var fulltime_h1 "Matched to full-time program (Greedy reject algo.)"
lab var fulltime_h2 "Matched to full-time program (Preference flip algo.)"
lab var fulltime_h3 "Matched to full-time program (Combined algo.)"
lab var fulltime_h4 "Matched to full-time program (SP-DA)"

lab var rank_of_admission_orig "Rank of admission (benchmark)"
lab var rank_of_admission_h1 "Rank of admission (Greedy reject algo.)"
lab var rank_of_admission_h2 "Rank of admission (Preference flip algo.)"
lab var rank_of_admission_h3 "Rank of admission (Combined algo.)"
lab var rank_of_admission_h4 "Rank of admission (SP-DA)"


lab var winner_h1 "Winner (Greedy reject algo.) relative to benchmark"
lab var winner_h2 "Winner (Preference flip algo.) relative to benchmark"
lab var winner_h3 "Winner (Combined algo.) relative to benchmark"

lab var loser_h1 "Loser (Greedy reject algo.) relative to benchmark"
lab var loser_h2 "Loser (Preference flip algo.) relative to benchmark"
lab var loser_h3 "Loser (Combined algo.) relative to benchmark"


keep id contract_id_real contract_id_orig contract_id_h* program_id_orig program_id_h* program_county_orig program_county_h* ///
	schedule_orig schedule_h* funding_real funding_orig funding_h* fulltime_orig fulltime_h* rank_of_admission_orig ///
	rank_of_admission_h* winner_h* loser_h*

save "${data_matching}", replace